SHEA Spring 2024 Data Visualization Demo

Matthew Ziegler

2024-03-26

Overview

R Introduction

R Introduction

R Introduction

Getting Started

library(dplyr)
library(ggplot2)
library(gt)

Piping Data

data %>% new_data %>% tables

data %>% different_data %>% figures

Loading our Data

dat_cdiff <- read.csv("/Users/mattz/Documents/GitHub/shea24_demo/HAICViz_-_CDI_20240214.csv") %>%
    janitor::clean_names() %>%
    filter(topic == "Case rates (cases per 100,000)", series == "Community-associated" |
        series == "Healthcare-associated")
head(dat_cdiff)
##   year_name                          topic view_by  grouping
## 1      2011 Case rates (cases per 100,000)   Total Epi Class
## 2      2012 Case rates (cases per 100,000)   Total Epi Class
## 3      2013 Case rates (cases per 100,000)   Total Epi Class
## 4      2014 Case rates (cases per 100,000)   Total Epi Class
## 5      2015 Case rates (cases per 100,000)   Total Epi Class
## 6      2016 Case rates (cases per 100,000)   Total Epi Class
##                 series value
## 1 Community-associated 48.16
## 2 Community-associated 52.88
## 3 Community-associated 55.75
## 4 Community-associated 57.83
## 5 Community-associated 65.81
## 6 Community-associated 67.20

Let’s do a simple graph

dat_cdiff_line_plot <- dat_cdiff %>%
    ggplot(aes(x = as.factor(year_name), y = value, group = series)) + geom_line(aes(linetype = series)) +
    labs(title = "Cases by year", y = "CDI cases per 1000 individuals", x = "Year")

Let’s do a simple graph

Let’s do a simple graph

dat_cdiff %>%
    ggplot(aes(x = as.factor(year_name), y = value, fill = series)) + geom_col(position = "dodge") +
    labs(title = "Cases by year", y = "CDI cases per 1000 individuals", x = "Year")

A little more complicated

library(tidyr)
dat_cdiff_cat_plot <- read.csv("/Users/mattz/Documents/GitHub/shea24_demo/HAICViz_-_CDI_20240214.csv") %>%
    janitor::clean_names() %>%
    filter(topic == "Case rates (cases per 100,000)") %>%
    mutate(cat = case_when(grepl("HA|CA", series) == TRUE & grepl("years", series) ==
        TRUE ~ "age", grepl("Male|Female", series) == TRUE & grepl("HA|CA", series) ==
        TRUE ~ "sex", grepl("White|Non-white", series) == TRUE & grepl("HA|CA", series) ==
        TRUE ~ "race")) %>%
    filter(!is.na(cat)) %>%
    separate(series, into = c("category", "group"), sep = " - ") %>%
    ggplot(aes(x = as.factor(year_name), y = value, group = interaction(group, category),
        linetype = category, col = group, )) + geom_line(lwd = 1) + theme_minimal() +
    theme(axis.text.x = element_text(angle = 75, vjust = 0, hjust = 0)) + facet_wrap(vars(cat)) +
    labs(title = "C.difficile Infection by Year", y = "Case rates (cases per 100,000)",
        x = "Year")

A little more complicated

dat_cdiff_cat_plot

Highlighting and annotating

library(gghighlight)
dat_mdrgn <- read.csv("/Users/mattz/Documents/GitHub/shea24_demo/HAICViz_-_MuGSI_20240330.csv") %>%
    janitor::clean_names() %>%
    mutate(keep = case_when(viewby == "Organism" & series != "All cases" ~ 1, organism ==
        "CRAB" & viewby == "All cases" & topic == "Case Rates" ~ 1, TRUE ~ 0)) %>%
    filter(keep == 1) %>%
    mutate(series = ifelse(organism == "CRAB", "Acinetobacter baumanii", series))
dat_mdrgn_plot <- dat_mdrgn %>%
    ggplot(aes(x = as.factor(year_name), y = value, group = series)) + geom_line(aes(linetype = series)) +
    theme(axis.text.x = element_text(angle = 75, vjust = 0, hjust = 0)) + labs(title = "Cases by year - Carbapenem-Resistant GNB",
    y = "Cases per 1000 individuals", x = "Year") + gghighlight(series == "Acinetobacter baumanii") +
    theme_minimal()

Animated figures

library(magick)
library(gganimate)
library(maps)
library(tidyr)
respi <- read.csv("/Users/mattz/Documents/GitHub/shea24_demo/Outpatient_Respiratory_Illness_Activity_Map_20240401.csv") %>%
    janitor::clean_names() %>%
    mutate(region = tolower(state)) %>%
    separate(activity_level, into = c(NA, "level"), sep = " ") %>%
    mutate(level = as.numeric(level)) %>%
    filter(season == "2022-2023")

states <- map_data("state")
region_dat_respi <- left_join(states, respi, by = "region")
gif_a <- region_dat_respi %>%
    ggplot(., aes(long, lat, group = group)) + geom_polygon(aes(fill = level), colour = alpha("white",
    1/2), size = 0.05) + geom_polygon(data = states, colour = "black", fill = NA) +
    scale_fill_gradientn(colours = terrain.colors(6)) + theme_void() + transition_time(week) +
    labs(title = "Respiratory Infection Activity 22-23 Season: Week {frame_time}") +
    theme_minimal()

gif_a <- animate(gif_a, width = 700, height = 480)

Animate

gif_b <- region_dat_respi %>%
    # filter(!is.na(value)) %>%
ggplot(data = ., aes(y = level)) + geom_boxplot() + labs(x = "", title = "National Value") +
    theme(axis.text.x = element_blank()) + transition_time(week)
# enter_fade() + exit_shrink() + ease_aes('sine-in-out')

gif_b <- animate(gif_b, width = 600, height = 480)

Animate

a_mgif <- image_read(gif_a)
b_mgif <- image_read(gif_b)

new_gif <- image_append(c(a_mgif[1], b_mgif[1]))
for (i in 1:95) {
    combined <- image_append(c(a_mgif[i], b_mgif[i]))
    new_gif <- c(new_gif, combined)
}

Plotting Model Output

library(gtsummary)
library(marginaleffects)

latitude_by_states <- states %>%
    group_by(region) %>%
    summarise(mean_lat = mean(lat))

Plotting Model Output

vaccination <- read.csv("/Users/mattz/Documents/GitHub/shea24_demo/Vaccination_Coverage_among_Health_Care_Personnel_20240401.csv") %>%
    janitor::clean_names() %>%
    mutate(year = as.numeric(substr(season, 1, 4))) %>%
    mutate(region = tolower(geography)) %>%
    left_join(latitude_by_states, by = "region") %>%
    rename(latitude = mean_lat) %>%
    filter(personnel_type != "All Health Care Personnel")

Plotting Model Output

model <- lm(estimate ~ year + latitude + personnel_type, dat = vaccination)

Plotting Model Output

tbl_regression(model)
Characteristic Beta 95% CI1 p-value
year 1.2 0.96, 1.5 <0.001
latitude 0.34 0.22, 0.47 <0.001
personnel_type
    Adult Students/Trainees and Volunteers
    Employees 3.2 1.7, 4.6 <0.001
    Licensed Independent Practitioners -14 -16, -13 <0.001
1 CI = Confidence Interval

Plotting Model Output

plot_predictions(model, condition = "year") + labs(y = "Estimated Proportion: Vaccine Compliance",
    x = "Year", title = "Estimated Vaccine Compliance by Year") + theme_minimal()

Plotting Model Output

What Else?

Conclusions

Resources

Citations